Executed: Mon Mar 27 11:45:02 2017

Duration: 12 seconds.

Leakage Coefficient Summary

This notebook summarize the leakage coefficient fitted from 4 dsDNA samples.

Import software


In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from cycler import cycler
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format='retina'  # for hi-dpi displays

In [2]:
figure_size = (5, 4)
default_figure = lambda: plt.subplots(figsize=figure_size)
save_figures = True

def savefig(filename, **kwargs):
    if not save_figures:
        return
    import os
    dir_ = 'figures/'
    kwargs_ = dict(dpi=300, bbox_inches='tight')
                   #frameon=True, facecolor='white', transparent=False)
    kwargs_.update(kwargs)
    plt.savefig(dir_ + filename, **kwargs_)

Data files


In [3]:
bsearch_str = 'DexDem'
leakage_kde = pd.read_csv(
    'results/Multi-spot - leakage coefficient all values KDE %s.csv' % bsearch_str, index_col=0)
leakage_gauss = pd.read_csv(
    'results/Multi-spot - leakage coefficient all values gauss %s.csv' % bsearch_str, index_col=0)
nbursts = pd.read_csv(
    'results/Multi-spot - leakage coefficient all values nbursts %s.csv' % bsearch_str, index_col=0)
for df in (leakage_kde, leakage_gauss, nbursts):
    df.columns.name = 'Channel'

In [4]:
for dx in (leakage_gauss, leakage_kde, nbursts):
    dx.columns = pd.Index(np.arange(1, 9), name='Spot')

In [5]:
PLOT_DIR = './figure/'

Plot style


In [6]:
bmap = sns.color_palette("Set1", 9)
colors_dark = np.array(bmap)[(1,0,2,3,4,8), :]
colors_dark4 = np.array(bmap)[(1,0,2,8), :]

bmap = sns.color_palette('Paired', 12)
colors_light = np.array(bmap)[(0,4,2,8,6,10), :]
colors_light4 = np.array(bmap)[(0,4,2,8), :]
colors_light[-1] = colors_light4[-1] = [.8, .8, .8]

colors_paired = np.zeros((colors_dark.shape[0]*2, colors_dark.shape[1]))
colors_paired[::2] = colors_dark
colors_paired[1::2] = colors_light
colors_paired4 = colors_paired[(0, 1, 2, 3, 4, 5, 10, 11), :]
sns.palplot(colors_paired4)



In [7]:
sns.set(style='ticks', font_scale=1.4, palette=colors_paired)

In [8]:
fig, ax = plt.subplots()
ax2 = ax.twinx()

kws = dict(lw=2, marker='o', ms=8)
for i, did in enumerate(('7d', '12d', '17d', 'DO')):
    (100*leakage_kde).loc[did].plot(label='%s KDE' % did, ax=ax, color=colors_light4[i], **kws)
    nbursts.loc[did].plot(ax=ax2, ls='--', lw=2.5, color=colors_dark4[i])

for i, did in enumerate(('7d', '12d', '17d', 'DO')):    
    (100*leakage_gauss).loc[did].plot(label='%s Gauss' % did, ax=ax, color=colors_dark4[i], **kws)
    
handles, lab = ax.get_legend_handles_labels()
h = handles#[1::2] + handles[::2]
l = lab[1::2] + lab[::2]
ax.legend(ncol=2, loc=1, bbox_to_anchor=(1, 0.5), borderaxespad=0.)
ax.set_ylim(0)

ax2.set_ylim(0, 3200)
plt.xlim(0.75, 8.25)
plt.xlabel('Channel')
ax.set_ylabel('Leakage %')
ax2.set_ylabel('# Bursts')
sns.despine(offset=10, trim=True, right=False)
savefig('multi-spot leakage KDE vs Gauss.svg')



In [9]:
# bmap = sns.color_palette("Set1", 9)
# colors = np.array(bmap)[(1,0,2,3,4,8,6,7), :]
# sns.set_palette(colors)
# sns.palplot(colors)

In [10]:
sns.swarmplot(data=leakage_kde);
plt.figure()
sns.swarmplot(data=leakage_kde.T);


Average leakage

Mean per sample:


In [11]:
lk_s = pd.DataFrame(index=['mean', 'std'], columns=leakage_kde.index, dtype=float)

lk_s.loc['mean'] = leakage_kde.mean(1)*100
lk_s.loc['std'] = leakage_kde.std(1)*100
lk_s = lk_s.round(5)
lk_s


Out[11]:
Sample 7d 12d 17d DO
mean 3.78862 3.53979 3.67895 3.19141
std 0.20973 0.40173 0.33983 0.16503

Mean per sample (weighted on the number of bursts):

Number of bursts in D-only population:


In [12]:
nbursts


Out[12]:
Spot 1 2 3 4 5 6 7 8
Sample
7d 318 329 288 504 465 365 397 393
12d 197 199 177 235 237 203 223 217
17d 144 178 119 205 190 165 171 189
DO 1637 1583 1221 1893 1979 1789 1851 1995

In [13]:
leakage_kde


Out[13]:
Spot 1 2 3 4 5 6 7 8
Sample
7d 0.038206 0.041016 0.038637 0.038637 0.039501 0.036484 0.034340 0.036269
12d 0.029654 0.043188 0.034982 0.035840 0.032205 0.033699 0.035840 0.037775
17d 0.040150 0.040366 0.041016 0.034340 0.037560 0.032631 0.033485 0.034768
DO 0.030715 0.035840 0.031353 0.032205 0.031140 0.030928 0.031566 0.031566

In [14]:
lk_sw = pd.DataFrame(index=['mean', 'std'], columns=leakage_kde.index, dtype=float)

lk_sw.loc['mean'] = (nbursts*leakage_kde).sum(1)/nbursts.sum(1)*100
lk_sw.loc['std'] = np.sqrt((((leakage_kde.T*100 - lk_sw.loc['mean']).T**2) * nbursts).sum(1) / (nbursts.sum(1) - 1))
#lk_sw['mean'] = (nbursts * lk_sw).sum(1) / nbursts.sum(1).sum()
lk_sw = lk_sw.round(5)
lk_sw


Out[14]:
Sample 7d 12d 17d DO
mean 3.78606 3.53753 3.65209 3.18770
std 0.19519 0.36862 0.30638 0.14829

In [15]:
lk_swg = pd.DataFrame(index=['mean', 'std'], columns=leakage_gauss.index, dtype=float)

lk_swg.loc['mean'] = (nbursts*leakage_gauss).sum(1)/nbursts.sum(1)*100
lk_swg.loc['std'] = np.sqrt((((leakage_gauss.T*100 - lk_swg.loc['mean']).T**2) * nbursts).sum(1) / (nbursts.sum(1) - 1))
#lk_sw['mean'] = (nbursts * lk_sw).sum(1) / nbursts.sum(1).sum()
lk_swg = lk_swg.round(5)
lk_swg


Out[15]:
Sample 7d 12d 17d DO
mean 3.97789 3.59402 3.57203 3.15309
std 0.21081 0.37405 0.31756 0.15469

In [16]:
lk_sw_m = pd.concat((lk_sw.loc['mean'], lk_swg.loc['mean']), axis=1, keys=['KDE', 'Gauss'])
lk_sw_m


Out[16]:
KDE Gauss
Sample
7d 3.78606 3.97789
12d 3.53753 3.59402
17d 3.65209 3.57203
DO 3.18770 3.15309

In [17]:
lk_sw_s = pd.concat((lk_sw.loc['std'], lk_swg.loc['std']), axis=1, keys=['KDE', 'Gauss'])
lk_sw_s


Out[17]:
KDE Gauss
Sample
7d 0.19519 0.21081
12d 0.36862 0.37405
17d 0.30638 0.31756
DO 0.14829 0.15469

In [18]:
sns.set_style('ticks')

In [19]:
lk_sw_m.plot(yerr=lk_sw_s, lw=5, alpha=0.6)
plt.xlim(-0.2, 3.2)
plt.xticks(range(4), lk_sw_s.index)
sns.despine(trim=True, offset=10)



In [20]:
lk_sw_m.plot.bar(yerr=lk_sw_s, alpha=0.8)
sns.despine(trim=True, offset=10)



In [21]:
sns.swarmplot(data=leakage_kde*100, size=8, palette=colors_dark);
plt.ylim(0)
lk_sw_m.loc[:,'KDE'].plot(lw=3, alpha=0.8, color='k')
plt.xlim(-0.2, 3.2)
plt.xticks(range(4), lk_sw_s.index)
sns.despine(trim=True, offset=10)


Mean per channel:


In [22]:
lk_c = pd.DataFrame(index=['mean', 'std'], columns=leakage_kde.columns, dtype=float)

lk_c.loc['mean'] = leakage_kde.mean()*100
lk_c.loc['std'] = leakage_kde.std()*100
#lk_c['mean'] = lk_c.mean(1)
lk_c = lk_c.round(5)
lk_c


Out[22]:
Spot 1 2 3 4 5 6 7 8
mean 3.46812 4.01025 3.64970 3.52555 3.51015 3.34355 3.38078 3.50945
std 0.52705 0.30872 0.42331 0.27031 0.40613 0.23308 0.17835 0.26534

Mean per channel (weighted on the number of bursts):


In [23]:
lk_cw = pd.DataFrame(index=['mean', 'std'], columns=leakage_kde.columns, dtype=float)

lk_cw.loc['mean'] = (nbursts*leakage_kde).sum()/nbursts.sum()*100
lk_cw.loc['std'] = np.sqrt((((leakage_kde*100 - lk_cw.loc['mean'])**2) * nbursts).sum(0) / (nbursts.sum(0) - 1))
#lk_cw['mean'] = lk_cw.mean(1)
lk_cw = lk_cw.round(5)
lk_cw


Out[23]:
Spot 1 2 3 4 5 6 7 8
mean 3.22532 3.75747 3.35081 3.38030 3.30070 3.20666 3.24678 3.29263
std 0.33302 0.26705 0.33578 0.24944 0.32652 0.19952 0.14574 0.22249

In [24]:
lk_cwg = pd.DataFrame(index=['mean', 'std'], columns=leakage_gauss.columns)

lk_cwg.loc['mean'] = (nbursts*leakage_gauss).sum()/nbursts.sum()*100
lk_cwg.loc['std'] = np.sqrt((((leakage_kde*100 - lk_cwg.loc['mean'])**2) * nbursts).sum(0) / (nbursts.sum(0) - 1))
#lk_cwg['mean'] = lk_cwg.mean(1)
lk_cwg = lk_cwg.round(5)
lk_cwg


Out[24]:
Spot 1 2 3 4 5 6 7 8
mean 3.24535 3.78637 3.33695 3.37488 3.2711 3.22545 3.2527 3.30717
std 0.333624 0.268609 0.336062 0.249497 0.327863 0.200399 0.145862 0.222962

In [25]:
lk_cw_m = pd.concat((lk_cw.loc['mean'], lk_cwg.loc['mean']), axis=1, keys=['KDE', 'Gauss'])
lk_cw_m.T


Out[25]:
Spot 1 2 3 4 5 6 7 8
KDE 3.22532 3.75747 3.35081 3.3803 3.3007 3.20666 3.24678 3.29263
Gauss 3.24535 3.78637 3.33695 3.37488 3.2711 3.22545 3.2527 3.30717

In [26]:
lk_cw_s = pd.concat((lk_cw.loc['std'], lk_cwg.loc['std']), axis=1, keys=['KDE', 'Gauss'])
lk_cw_s.T


Out[26]:
Spot 1 2 3 4 5 6 7 8
KDE 0.33302 0.26705 0.33578 0.24944 0.32652 0.19952 0.14574 0.22249
Gauss 0.333624 0.268609 0.336062 0.249497 0.327863 0.200399 0.145862 0.222962

In [27]:
sns.set_palette(colors_dark)

In [28]:
kws = dict(lw=5, marker='o', ms=8, alpha=0.5)
lk_cw.loc['mean'].plot(yerr=lk_cw.loc['std'], **kws)
lk_cwg.ix['mean',:].plot(yerr=lk_cwg.loc['std'],**kws)
plt.ylim(0, 4)
plt.xlim(0.75, 8.25)
sns.despine(trim=True)



In [29]:
lk_cw_m.plot.bar(alpha=0.8)
#sns.despine(trim=True, offset=10)


Out[29]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b16ab00>

Transform table in tidy form:


In [30]:
leakage_kde_t = pd.melt(leakage_kde.reset_index(), id_vars=['Sample'], 
                        value_name='leakage_kde').apply(pd.to_numeric, errors='ignore')
leakage_kde_t.leakage_kde *= 100
leakage_kde_t.head()


Out[30]:
Sample Spot leakage_kde
0 7d 1 3.8206
1 12d 1 2.9654
2 17d 1 4.0150
3 DO 1 3.0715
4 7d 2 4.1016

In [31]:
_ = lk_cw_m.copy().assign(Spot=range(1, 9)).set_index('Spot')
_.head()


Out[31]:
KDE Gauss
Spot
1 3.22532 3.24535
2 3.75747 3.78637
3 3.35081 3.33695
4 3.38030 3.37488
5 3.30070 3.2711

In [32]:
sns.set_palette(colors_dark4)

In [33]:
sns.swarmplot(x='Spot', y='leakage_kde', data=leakage_kde_t, size=6, hue='Sample');
_ = lk_cw_m.copy().assign(Spot=range(8)).set_index('Spot')
_.loc[:,'KDE'].plot(lw=3, alpha=0.8, color='k')
plt.ylim(0)
plt.xlim(-0.25, 7.25)
sns.despine(trim=True)


NOTE: There is a per-channel trend that cannot be ascribed to the background because we performend a D-emission burst search and selection and the leakage vs ch does not resemble the D-background vs channel curve.

The effect is probably due to slight PDE variations (detectors + optics) that slightly change $\gamma$ on a per-spot basis.

Weighted mean of the weighted mean


In [34]:
leakage_kde_wmean = (leakage_kde*nbursts).sum().sum() / nbursts.sum().sum()
leakage_kde_wmean


Out[34]:
0.033399198942959708

Figure


In [35]:
%config InlineBackend.figure_format='retina'  # for hi-dpi displays

In [36]:
leakage_kde


Out[36]:
Spot 1 2 3 4 5 6 7 8
Sample
7d 0.038206 0.041016 0.038637 0.038637 0.039501 0.036484 0.034340 0.036269
12d 0.029654 0.043188 0.034982 0.035840 0.032205 0.033699 0.035840 0.037775
17d 0.040150 0.040366 0.041016 0.034340 0.037560 0.032631 0.033485 0.034768
DO 0.030715 0.035840 0.031353 0.032205 0.031140 0.030928 0.031566 0.031566

In [37]:
leakage_kde_t = pd.melt((100*leakage_kde).reset_index(), id_vars=['Sample'], 
                        value_name='leakage_kde').apply(pd.to_numeric, errors='ignore')
leakage_kde_t.head()


Out[37]:
Sample Spot leakage_kde
0 7d 1 3.8206
1 12d 1 2.9654
2 17d 1 4.0150
3 DO 1 3.0715
4 7d 2 4.1016

In [38]:
# leakage_kde_t = pd.melt((100*leakage_kde).T.reset_index(), id_vars=['Spot'], 
#                         value_name='leakage_kde').apply(pd.to_numeric, errors='ignore')
# leakage_kde_t.head()

In [39]:
sns.set_palette(colors_dark4)

In [40]:
fig, ax = plt.subplots(1, 2, figsize=(12, 5), sharey=True)
plt.subplots_adjust(wspace=0.1)

sns.swarmplot(x='Sample', y='leakage_kde', data=leakage_kde_t, size=6, ax=ax[0])
lk_sw_m.loc[:,'KDE'].plot(lw=3, alpha=0.8, color='k', ax=ax[0])
ax[0].set_ylim(0)
ax[0].set_xlim(-0.2, 3.2)
plt.xticks(range(4), lk_sw_s.index)
sns.despine(trim=True, offset=10, ax=ax[0])

sns.swarmplot(x='Spot', y='leakage_kde', data=leakage_kde_t, size=6, hue='Sample', ax=ax[1])
_ = lk_cw_m.copy().assign(Spot=range(8)).set_index('Spot')
_.loc[:,'KDE'].plot(lw=3, alpha=0.8, color='k', label='mean')
ax[1].set_ylim(0)
ax[1].set_xlim(-0.25, 7.25)
plt.xticks(np.arange(8));
sns.despine(trim=True, offset=10, ax=ax[1], left=True)
ax[1].yaxis.set_visible(False)
ax[0].set_ylabel('Leakage %')
leg = ax[1].get_legend()
h, l = ax[1].get_legend_handles_labels()
ax[1].legend(h[1:] + h[:1], l[1:] + l[:1], title='Sample', loc='lower right')
fs = 28
ax[0].text(0,0, 'A', fontsize=fs)
ax[1].text(0,0, 'B', fontsize=fs)
savefig('multi-spot leakage KDE 2panels.png')


Save

Per-channel mean


In [41]:
lk_cw.to_csv('results/Multi-spot - leakage coefficient mean per-ch KDE %s.csv' % bsearch_str)

Per-sample mean


In [42]:
lk_sw.to_csv('results/Multi-spot - leakage coefficient mean per-sample KDE %s.csv' % bsearch_str)

Global mean


In [43]:
'%.5f' % leakage_kde_wmean


Out[43]:
'0.03340'

In [44]:
with open('results/Multi-spot - leakage coefficient KDE wmean %s.csv' % bsearch_str, 'w') as f:
    f.write('%.5f' % leakage_kde_wmean)